# -*- coding: utf-8 -*-
"""
Created on Wed Dec 26 14:27:13 2018

@author: MaYuling
"""
import requests
import time
from requests.exceptions import RequestException
import re
import json

def get_one_page(url):
    try:
        headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}
        response=requests.get(url=url,headers=headers,timeout=(3,5))
        if response.status_code==200:
            return response.text
        return None
    except RequestException:
        pass
def parse_one_page(html):
    try:
        pattern_id=re.compile('<a href.*?cd.lianjia.com/chengjiao/(.*?)'
                           +'.html.*?_blank">.*?</a>',re.S)
        item_ids=re.findall(pattern_id,html)
        for item_id in item_ids:
            print(item_id)
            houseid.append(item_id)    
    except:
        pass                                                
def main(i,htm):
        url='https://cd.lianjia.com/chengjiao/pg'+str(i)+htm
        print(url)
        html=get_one_page(url)
        parse_one_page(html)
def write_to_file(houseid):
    with open(r'C:\Users\MaYuling\Desktop\houseid.txt','a',encoding='utf-8') as f:
        f.write(json.dumps(houseid))

if __name__ == '__main__':
    houseid=[]
    page=['','jinjiang','qingyang','wuhou','gaoxin7','chenghua','jinniu',
         'tianfuxinqu','gaoxinxi1','shuangliu','wenjiang','pidou','longquanyi',
         'xindou','p1','p2','p3','p4','p5','p6','p7','p8','a1','a2','a3','a4',
         'a5','a6','a7','a8','l1','l2','l3','l4','l5','sf1','sf2','sf3','sf4',
         'sf5','lc1','lc2','lc3','f1','f2','f3','f4','f5','y1','y2','y3','y4',
         'y5','ie1','ie2','de1','de2','de3']
    for pg in page:    
        for i in range(1,101):
            #main(pg+'pg',i,'/')
            main(i,pg+'/')
            time.sleep(0.1)
    house_id = list(set(houseid)) # id去重
    write_to_file(house_id)
